##############################################################################################################
# This scripts takes a CSV file as input where one column is a date.
# It generates a CSV output file with multiple columns, where the date (for each row) is processed to get:
#	- year
#	- month
#	- day
#	- yearMonth
#	- yearMonthDay
#	- dayOfWeek
#	- dayNum
#	- weekNum
#	- monthNum
#
# dayNum, weekNum and monthNum are relative to a reference which is passed (or not) as an extra argument.
#
# EXAMPLE USE:
#  python makeDateTimeFields.py input.csv output.csv 1 '%Y-%m-%d'
#  python makeDateTimeFields.py input.csv output.csv 1 '%Y-%m-%d' '1987-12-07'
##############################################################################################################

import sys
from datetime import datetime
from datetime import date
import time
from myutilities import printRemainingTime, readDataFromFile, appendData2File

# Read arguments
[inputFile, outputFile, dateColIdx, inputDateFormat] = sys.argv[1:5]
try:
    dateRefObj = datetime.strptime(sys.argv[5], inputDateFormat)
except:
    dateRefObj = datetime.fromtimestamp(0)

# Read input file
[dates] = readDataFromFile(inputFile, ',', True, [int(dateColIdx)])

# Prepare output file
fout = open(outputFile, 'w')
fout.write('year,month,day,hours,minutes,seconds,yearMonth,yearMonthDay,dayOfWeek,secondsSinceRef,dayNum,weekNum\n')
fout.close()

mydata = []
NumDates = len(dates)
tic = time.time()
for ii in range(NumDates):
    dateObj = datetime.strptime(dates[ii], inputDateFormat)
    year = dateObj.strftime('%Y')
    month = dateObj.strftime('%m')
    day = dateObj.strftime('%d')
    hours = dateObj.strftime('%H')
    minutes = dateObj.strftime('%M')
    seconds = dateObj.strftime('%S')
    yearMonth = year + month
    yearMonthDay = yearMonth + day
    dayOfWeek = dateObj.strftime('%w')
    diff = dateObj - dateRefObj
    secondsSinceRef = diff.total_seconds()
    dayNum = diff.days			# Index starts at 0 --> day 0 is the first 24 hours starting at the instant dateRef
    weekNum = dayNum//7			# Index starts at 0 --> week 0 is the first (7*24) hours starting at the instant dateRef
    mydata.append([year, month, day, hours, minutes, seconds, yearMonth, yearMonthDay, dayOfWeek, secondsSinceRef, dayNum, weekNum])
    if (ii + 1) % 10 == 0:
        appendData2File(mydata, outputFile, ',')
        mydata = []
    if (ii + 1) % 100000 == 0:
        printRemainingTime(tic, ii+1, NumDates)

appendData2File(mydata, outputFile, ',')
